---
title: "GOV 2001/STAT E-200: Replication Paper Draft"
author: "Due nov  27, 2022"
output:
  pdf_document: default
  word_document: default
  html_document: default
header-includes:
- \usepackage[labelformat = empty]{caption}
- \usepackage[textfont = bf]{caption}
- \usepackage{graphicx}
- \usepackage{float}
- \usepackage{dcolumn}
fig_width: 3
fig_height: 2
urlcolor: blue
---
Tomonoshin Aoki
Group members: Wyatt, Motunrayo


setwd("C:/Users/taoki/Documents/US/Harvard Chan/fall/GOV 2001 Quantitative Social Science Methods I/collaboration/PS6/dataverse_files")

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

Information about the paper:

Original paper title: The politics of pain: Medicaid expansion, the ACA and the opioid epidemic

Dataverse link: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/SUPEL1&version=1.0
Journal published: Journal of Public Policy 




```{r}
rm(list=ls())
#had to install US map
library(usmap)
library(readr)
library(haven)
library(foreign)
library(tidyr)
library(dplyr)
library(rdrobust) 
library(rddensity)
library(lmtest)
library(ggplot2)
library(dotwhisker)
library(tidyverse)

library(lmtest)
library(sandwich)

setwd("/Users/jwyattkoma/Documents/Harvard Fall 2022 Coursework/GOV 2001 Stats/replication paper")
#setwd("C:/Users/taoki/Documents/US/Harvard Chan/fall/GOV 2001 Quantitative Social Science Methods I/collaboration/rep")


```






```{r}
###################################################
#       Loading the Data
###################################################

# Border sample of counties within 100 miles of opposing Medicaid expansion border
grdborder <- read.csv("opioid_analyses_data.csv")
# subset of original data for Republican-leaning states 
similarstates <- read.csv("redstates_analyses_data.csv")
# Data on when states expanded Medicaid
medicaidexpansionyear <- read_csv("medicaidexpansionyear.csv")
# Trump tweets about health
tweets <- read.csv("trump_electionyeartweets.csv")
# NYT Opioid Data
nyt <- read.csv("nyt_opioid_mentions.csv")

#county-level election data
swing <- read_csv("countypres_2000-2020.csv")

```


```{r}
#rename county FIPS to FIPS
swing<-swing %>% 
  rename(fips=county_fips)

#create a variable for the political party that has the max number of votes in the election
swing<-swing %>% 
  group_by(year,fips) %>% 
  summarise(mainparty=.$party[which.max(candidatevotes)])

#create a new variable by year for the party that got the main number of Pres votes in each county
swing2000<-swing %>% 
  filter(year==2000) %>% 
  mutate(party2000=mainparty) %>% 
  ungroup() %>% 
  select(fips,party2000)

swing2004<-swing %>% 
  filter(year==2004) %>% 
  mutate(party2004=mainparty) %>% 
  ungroup() %>% 
  select(fips,party2004)

swing2008<-swing %>% 
  filter(year==2008) %>% 
  mutate(party2008=mainparty) %>% 
  ungroup() %>% 
  select(fips,party2008)

swing2012<-swing %>% 
  filter(year==2012) %>% 
  mutate(party2012=mainparty) %>% 
  ungroup() %>% 
  select(fips,party2012)

swing2016<-swing %>% 
  filter(year==2016) %>% 
  mutate(party2016=mainparty) %>% 
  ungroup() %>% 
  select(fips,party2016)

swing2020<-swing %>% 
  filter(year==2020) %>% 
  mutate(party2020=mainparty) %>% 
  ungroup() %>% 
  select(fips,party2020)

#merge objects that include counties by election years by county FIPS code
swing_join<-left_join(swing2000,swing2004,by="fips")
swing_join<-left_join(swing_join,swing2008,by="fips")
swing_join<-left_join(swing_join,swing2012,by="fips")
swing_join<-left_join(swing_join,swing2016,by="fips")
swing_join<-left_join(swing_join,swing2020,by="fips")

###
###create a new variable in an election year compared to next election year to determine swing status
###Ignore green party and other party for simplicity
###

swing_join<-swing_join %>% 
  mutate("swing2000to2004"=case_when(
    party2000=="REPUBLICAN"&party2004=="REPUBLICAN" ~ "RR",
    party2000=="DEMOCRAT"&party2004=="DEMOCRAT" ~ "DD",
    party2000=="DEMOCRAT"&party2004=="REPUBLICAN" ~ "DR",
    party2000=="REPUBLICAN"&party2004=="DEMOCRAT" ~ "RD",
    TRUE ~"other"
  ))

swing_join<-swing_join %>% 
  mutate("swing2004to2008"=case_when(
    party2004=="REPUBLICAN"&party2008=="REPUBLICAN" ~ "RR",
    party2004=="DEMOCRAT"&party2008=="DEMOCRAT" ~ "DD",
    party2004=="DEMOCRAT"&party2008=="REPUBLICAN" ~ "DR",
    party2004=="REPUBLICAN"&party2008=="DEMOCRAT" ~ "RD",
    TRUE ~"other"
  ))

swing_join<-swing_join %>% 
  mutate("swing2008to2012"=case_when(
    party2008=="REPUBLICAN"&party2012=="REPUBLICAN" ~ "RR",
    party2008=="DEMOCRAT"&party2012=="DEMOCRAT" ~ "DD",
    party2008=="DEMOCRAT"&party2012=="REPUBLICAN" ~ "DR",
    party2008=="REPUBLICAN"&party2012=="DEMOCRAT" ~ "RD",
    TRUE ~"other"
  ))

swing_join<-swing_join %>% 
  mutate("swing2012to2016"=case_when(
    party2012=="REPUBLICAN"&party2016=="REPUBLICAN" ~ "RR",
    party2012=="DEMOCRAT"&party2016=="DEMOCRAT" ~ "DD",
    party2012=="DEMOCRAT"&party2016=="REPUBLICAN" ~ "DR",
    party2012=="REPUBLICAN"&party2016=="DEMOCRAT" ~ "RD",
    TRUE ~"other"
  ))

swing_join<-swing_join %>% 
  mutate("swing2016to2020"=case_when(
    party2016=="REPUBLICAN"&party2020=="REPUBLICAN" ~ "RR",
    party2016=="DEMOCRAT"&party2020=="DEMOCRAT" ~ "DD",
    party2016=="DEMOCRAT"&party2020=="REPUBLICAN" ~ "DR",
    party2016=="REPUBLICAN"&party2020=="DEMOCRAT" ~ "RD",
    TRUE ~"other"
  ))


#change FIPS to integer in swing_join dataset to allow merge to main file
swing_join$fips <- as.integer(swing_join$fips)

#Merge county-level indicator for swing status to be in main analysis the author create
grdborder<-left_join(grdborder,swing_join,by="fips")


###
##Take a look at the swing counties from 08-2012
###
table(grdborder$swing2008to2012)

###
##create a new variable rolling up DR and RD swing counties to overall for sample size issues
### (only 4 RD counties)

grdborder<-grdborder %>% 
  mutate("swing_indicator_08to12"=case_when(
    swing2008to2012== "DD" | swing2008to2012== "RR" ~ 'non-swing county',
        swing2008to2012== "DR" | swing2008to2012== "RD" ~ 'swing county',
          TRUE ~"other"  ))

table(grdborder$swing2008to2012,grdborder$swing_indicator_08to12)

# made  the swing counties from 04-2008
grdborder<-grdborder %>% 
  mutate("swing_indicator_04to08"=case_when(
    swing2004to2008== "DD" | swing2004to2008== "RR" ~ 'non-swing county04to08',
        swing2004to2008== "DR" | swing2004to2008== "RD" ~ 'swing county04to08',
          TRUE ~"other"  ))

table(grdborder$swing_indicator_04to08)
table(grdborder$swing2004to2008,grdborder$swing_indicator_04to08)

# made  the swing counties from 2000-2012
#grdborder<-grdborder %>% 
#  mutate("swing"=case_when(
#    swing2000to2004== "DD"&swing2004to2008== "DD"&swing2008to2012== "DD" ~ 'always_D',
#    swing2000to2004== "RR"&swing2004to2008== "RR"&swing2008to2012== "RR" ~ 'always_R',
#          TRUE ~'swing'  ))

#grdborder<-grdborder %>% 
#  mutate("always_D" =case_when(
#    swing== "always_D" ~ 1,
#          TRUE ~ 0  ))

#grdborder<-grdborder %>% 
#  mutate("always_R" =case_when(
#    swing== "always_R" ~ 1,
 #         TRUE ~ 0  ))

#grdborder<-grdborder %>% 
#  mutate("ind_swing" =case_when(
#    swing== "swing" ~ 1,
#          TRUE ~ 0  ))



# Create variable that identifies counties that changed from Dem to R between 04 and 08 presidential election 
#and the 08 to 12 election

grdborder<-grdborder %>% 
  mutate("swing"=case_when(
    swing2004to2008== "DD"&swing2008to2012== "DD" ~ 'always_D',
    swing2004to2008== "RR"&swing2008to2012== "RR" ~ 'always_R',
          TRUE ~'swing'  ))

grdborder<-grdborder %>% 
  mutate("always_D" =case_when(
    swing== "always_D" ~ 1,
          TRUE ~ 0  ))

grdborder<-grdborder %>% 
  mutate("always_R" =case_when(
    swing== "always_R" ~ 1,
          TRUE ~ 0  ))

grdborder<-grdborder %>% 
  mutate("ind_swing" =case_when(
    swing== "swing" ~ 1,
          TRUE ~ 0  ))



```



```{r}
###################################################
#                 Maps 
###################################################
# Expansion Status Maps for Full and Border Samples

# Prepping data for plots (defining when a case expanded and whether included in border)
medicaidexpansionyear$expanded_15 <- NA
medicaidexpansionyear$expanded_15[medicaidexpansionyear$year_expanded<=2015] <- 1
medicaidexpansionyear$expanded_15[medicaidexpansionyear$year_expanded>=2016] <- 0
medicaidexpansionyear$expanded_15[medicaidexpansionyear$medicaid_expansion==0] <- 0
medicaidexpansionyear$state <- medicaidexpansionyear$state_abv
medicaidexpansionyear$Expansion[medicaidexpansionyear$expanded_15==1] <- "Yes"
medicaidexpansionyear$Expansion[medicaidexpansionyear$expanded_15==0] <- "No"


### Full Sample Map 

usmap::plot_usmap(data = medicaidexpansionyear, values = "Expansion")  + 
  scale_fill_manual(name = "Expanded Medicaid", 
                    labels = c("Yes" = "Yes", 
                               "No" = "No"),
                    values = c("Yes" = "#2D2D2D", 
                               "No" = "#D0D0D0"), na.translate=FALSE) + theme(legend.position="right") 


## Border Sample Map 

usmap::plot_usmap(include = c("AR", "AZ", "CO", "IA", "ID", "IL", "KS", "KY", "LA", "MD", "ME", "MI",
                       "MN", "MO", "MS", "MT", "ND", "NM", 
                       "NE", "NH", "NV", "OK", "OR", "SD", "TN", "TX", "UT",
                       "VA", "WA", "WI", "WV", "WY"),
           data = medicaidexpansionyear, values = "expanded_15") +  scale_fill_continuous(low="#D0D0D0", high="#2D2D2D",name = "Expanded Medicaid (2015)", label = scales::comma) + 
  theme(legend.position = "none")
```


```{r}
##########################################
#    Research Design Assumption Plots
########################################## 

grdborder$Expansion <- as.factor(grdborder$z)

# Pre-trend discontinuity in partisanship? No! 
p <- ggplot(grdborder, aes(x = x, y = dm2008, color = Expansion)) +
  geom_point(size = 0.5, alpha = 0.5) + 
  # Add a line based on a linear model for the people scoring less than 70
  geom_smooth(data = filter(grdborder, x <= 0), method = "lm") +
  # Add a line based on a linear model for the people scoring 70 or more
  geom_smooth(data = filter(grdborder, x > 0), method = "lm") +
  geom_vline(xintercept = 0) +
  labs(x = "Distance to Medicaid Border", y = "Democratic Two-Party Vote (2008)", color = "Expansion")
p1 <- p +   scale_colour_grey()

p1

# Pre-trend discontinuity in opioids? No! 
p <- ggplot(grdborder, aes(x = x, y = opioidrate_10, color = Expansion)) +
  geom_point(size = 0.5, alpha = 0.5) + 
  # Add a line based on a linear model for the people scoring less than 70
  geom_smooth(data = filter(grdborder, x <= 0), method = "lm") +
  # Add a line based on a linear model for the people scoring 70 or more
  geom_smooth(data = filter(grdborder, x > 0), method = "lm") +
  geom_vline(xintercept = 0) +
  labs(x = "Distance to Medicaid Border", y = "Opioid Prescription Rate (2010)", color = "Expansion")
p1 <- p +   scale_colour_grey()

p1
```

```{r}
##########################################
#    Research Design Assumption Plots
########################################## 

# Pre-trend discontinuity in opioids? No! 
p <- ggplot(grdborder, aes(x = x, y = opioidrate_14, color = Expansion)) +
  geom_point(size = 0.5, alpha = 0.5) + 
  # Add a line based on a linear model for the people scoring less than 70
  geom_smooth(data = filter(grdborder, x <= 0), method = "lm") +
  # Add a line based on a linear model for the people scoring 70 or more
  geom_smooth(data = filter(grdborder, x > 0), method = "lm") +
  geom_vline(xintercept = 0) +
  labs(x = "Distance to Medicaid Border", y = "Opioid Prescription Rate (2010)", color = "Expansion")
p1 <- p +   scale_colour_grey()

p1
```





```{r}
###################################################
#       Effect of ME on Opioid Plot
###################################################

# Impact of Medicaid on Opioids GRD Plot found in the manuscript

grdborder$Expansion2[grdborder$z==1] <- "Yes"
grdborder$Expansion2[grdborder$z==0] <- "No"

grdborder$Expansion2 <- as.factor(grdborder$Expansion2)

p1=ggplot(grdborder, aes(x = x, y = deltaopioids16_14, colour=Expansion2)) 
p2=p1 + stat_smooth(method = lm)
p3=p2+theme_bw()
p4=p3 + scale_color_grey(start=0.4, end=0.2)+ scale_color_grey(start=0.4, end=0.2)+ coord_cartesian( ylim = c(-15, 15))
p5 <- p4 + ylab("Change in Opioid Rate (2016-2014)") + xlab("Distance to Expansion Border") + labs(color="Expansion") 
p6 <- p5 + geom_vline(xintercept=c(0), linetype="dotted")
p6
```


```{r}
###################################################
#       Effect of ME on Opioid Plot
###################################################

# Impact of Medicaid on Opioids GRD Plot found in the manuscript


grdborder1<-grdborder %>% 
  mutate(deltaopioids10_8=opioidrate_10-opioidrate_08)


grdborder1$Expansion2[grdborder$z==1] <- "Yes"
grdborder1$Expansion2[grdborder$z==0] <- "No"

grdborder1$Expansion2 <- as.factor(grdborder1$Expansion2)

p1=ggplot(grdborder1, aes(x = x, y = deltaopioids10_8, colour=Expansion2)) 
p2=p1 + stat_smooth(method = lm)
p3=p2+theme_bw()
p4=p3 + scale_color_grey(start=0.4, end=0.2)+ coord_cartesian( ylim = c(-15, 15))
p5 <- p4 + ylab("Change in Opioid Rate (2010-2008)") + xlab("Distance to Expansion Border") + labs(color="Expansion") 
p6 <- p5 + geom_vline(xintercept=c(0), linetype="dotted")
p6
```


```{r}
###################################################
#            Elections Analyses 
###################################################
#install.packages('estimate')
#library(estimate)
library(stargazer)


#######################Column 1

######
###### ORIGINAL ANALYSIS
######
# Code used to produce main regression table in the main text 

# Opioid Increase (Regression Table 1, Column 1)
med <- lm(shift_16_12 ~  increase*z + x*z + 
            I(x^2)*z  +dm2004 +  stateabbr, data = grdborder, weights=vap2016)
#summary(med)
# heteroskedasticity-consistent standard errors 
#med<- coeftest(med, vcov = vcov(med, type = "HC0"))
#med2


######
###### REPLICATION ANALYSIS (with swing status indicator added that identifies 2004-2012 swing counties)
######

# Opioid Increase (Regression Table 1, Column 1) - Categorical swing variable (always D, always R, swing)
med_col1_swing <- lm(shift_16_12 ~  increase*z*swing + x*z + 
            I(x^2)*z  +dm2004 +  stateabbr, data = grdborder, weights=vap2016)
#summary(med_col1_swing)
#med_col1_swing <- coeftest(med_col1_swing, vcov = vcov(med_col1_swing, type = "HC0"))

###
### This code below runs the same regression as above but uses a binary indicator which makes it easier to understand
###

# Opioid Increase (Regression Table 1, Column 1) - Binary indicator for Always repub
med_col1_swing_alwaysr <- lm(shift_16_12 ~  increase*z*always_R + x*z + 
            I(x^2)*z  +dm2004 +  stateabbr, data = grdborder, weights=vap2016)

#med_col1_swing_alwaysr <- coeftest(med_col1_swing_alwaysr, vcov = vcov(med_col1_swing_alwaysr, type = "HC0"))

#summary(med_col1_swing_alwaysr)

# Opioid Increase (Regression Table 1, Column 1)- Binary indicator for Always Dem
med_col1_swing_alwaysd <- lm(shift_16_12 ~  increase*z*always_D + x*z + 
            I(x^2)*z  +dm2004 +  stateabbr, data = grdborder, weights=vap2016)

#med_col1_swing_alwaysd <- coeftest(med_col1_swing_alwaysd, vcov = vcov(med_col1_swing_alwaysd, type = "HC0"))
#summary(med_col1_swing_alwaysd)

# Opioid Increase (Regression Table 1, Column 1)- Binary indicator for Swing
med_col1_swing_ind_swing<- lm(shift_16_12 ~  increase*z*ind_swing + x*z + 
            I(x^2)*z  +dm2004 +  stateabbr, data = grdborder, weights=vap2016)
#med_col1_swing_ind_swing <- coeftest(med_col1_swing_ind_swing, vcov = vcov(med_col1_swing_ind_swing, type = "HC0"))
#summary(med_col1_swing_ind_swing)



stargazer(
      #Column for original data from journal replication
        med, 
      #Column for regression adjusted for counties that were always republican 
        med_col1_swing_alwaysr,
      #Column for regression adjusted for counties that were always Dem 
        med_col1_swing_alwaysd,
      #Column for regression adjusted for counties that were ever swing counties  
        med_col1_swing_ind_swing, 
        type = "text", 
        #omit fixed effects for state from output bc it looks bad
        omit = c('stateabbr', 'x', 'I(x^2)'),
        dep.var.caption = "Change in democratic vote from 2012 to 2016",
        covariate.labels = c(
           "Opioid increase", 
           "Medicaid expansion", 
           "Always republican",
           "Always democrat",
           "Swing county",
           #"Distance measure", 
           #"Polynomial",
          "Democratic vote 2004", 
          "Opioid increase X Medicaid expansion",
          "Opioid increase X always Republican", 
          "Medicaid expansion X always republican",
          "Opioid increase X always Democrat", 
          "Medicaid expansion X always Democrat",
          "Opioid increase X swing county", 
          "Medicaid expansion X swing county",
          "Opioid increase X Medicaid expansion X always republican",
          "Opioid increase X Medicaid expansion X always democrat",
          "Opioid increase X Medicaid expansion X swing county") ,
         column.labels = c("Original paper regression", "Always Republican counties", "Always Democrat", "Ever swing counties"),
         dep.var.labels = "Democratic vote share in 2016 minus Democratic vote share in 2012",
         title="Table 1. Comparing original analysis with extension work",
         #change order of rows slightly so that relevant rows are next to eachother for readability
        # order=c(1,2,3,4,5,6,7,8,10,12,9,11,13,14),
         digits=3, 
         out="draft1.htm"
         )


```










